package it.uniroma2.exp.tools;

import it.uniroma2.dtk.dt.DT;
import it.uniroma2.dtk.dt.GenericDT;
import it.uniroma2.util.math.ArrayMath;
import it.uniroma2.util.tree.Tree;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class Lexicalizer {

	/**
	 * @param args
	 * 0: svm file
	 * 1: vector size
	 * 2: type of composition operation (PRODUCT, CONVOLUTION)
	 * 3: [random offset]
	 */
	public static void main(String[] args) {
		try {
			Lexicalizer lex = new Lexicalizer();
			File original = new File(args[0]);
			String processedFileName = original.getName().replace(".svm", "_lex.svm");
			File processed = new File(original.getParentFile(), processedFileName);
			int vectorSize = Integer.parseInt(args[1]);
			int offset = args.length > 3 ? Integer.parseInt(args[3]) : 0;
			DT dt = new GenericDT(offset, vectorSize, true, true, Class.forName(args[2]));
			BufferedReader in = new BufferedReader(new FileReader(original));
			BufferedWriter out = new BufferedWriter(new FileWriter(processed));
			String line = in.readLine();
			int computed = 0;
			while (line != null) {
				String newLine = line.substring(0, line.indexOf("|ET|"));
				String tree = line.substring(line.indexOf("|BT|")+4, line.indexOf("|ET|")).trim(); 
				Tree t =  Tree.fromPennTree(tree);
				String vectorString = line.substring(line.indexOf("|ET|")+4, line.indexOf("|EV|")).trim();
				double[] vector = stringToVector(vectorString, vectorSize);
				newLine += "|ET| "+vectorToSvmString(lex.lexicalizeDT(vector, t, dt))+" |EV|";
				out.write(newLine);
				out.newLine();
				computed++;
				System.out.print(computed%100 == 0 ? computed+"\n" : ".");
				line = in.readLine();
			}
			out.close();
			in.close();
			System.out.println();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public double[] lexicalizeDT(double[] dtVector, Tree tree, DT dt) throws Exception {
		double[] result = dtVector;
		List<Tree> preterminals = extractPreterminals(tree);
		for (Tree pt : preterminals) {
			//We use dt() instead of dtf() to avoid errors if lambda = 0
			result = ArrayMath.sum(result, dt.dt(pt));
		}
		return result;
	}
	
	public List<Tree> extractPreterminals(Tree tree) {
		if (tree.getChildren().size() == 1 && tree.getChildren().get(0).isTerminal())
			return Collections.singletonList(tree);
		ArrayList<Tree> list = new ArrayList<Tree>();
		for (Tree child : tree.getChildren())
			list.addAll(extractPreterminals(child));
		return list;
	}

	public static String vectorToSvmString(double[] vector) {
		String result = "";
		for (int i=0; i<vector.length; i++)
			if (vector[i] != 0)
				result += (i+1)+":"+vector[i]+" ";
		return result.trim();
	}
	
	public static double[] stringToVector(String line,int size) throws Exception {
		double[] array = null; 
		if (line != null) {
			array = new double[size];
			String []  a = line.split(" ");
			for (String s:a) {
				array[new Integer(s.split(":")[0])-1] = new Double(s.split(":")[1]); 
			}
		}
		return array;
	}
}
